-
Notifications
You must be signed in to change notification settings - Fork 14.7k
[VectorCombine] Shrink loads used in shufflevector rebroadcasts #128938
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
@llvm/pr-subscribers-vectorizers @llvm/pr-subscribers-llvm-transforms Author: Leon Clark (PeddleSpam) ChangesAttempt to shrink the size of vector loads where only some of the incoming lanes are used for rebroadcasts in shufflevector instructions. Full diff: https://github.com/llvm/llvm-project/pull/128938.diff 1 Files Affected:
diff --git a/llvm/test/Transforms/AggressiveInstCombine/load-shufflevector.ll b/llvm/test/Transforms/AggressiveInstCombine/load-shufflevector.ll
new file mode 100644
index 0000000000000..3f6c8334e61cf
--- /dev/null
+++ b/llvm/test/Transforms/AggressiveInstCombine/load-shufflevector.ll
@@ -0,0 +1,345 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
+; RUN: opt -passes=aggressive-instcombine -S < %s | FileCheck %s
+
+define <8 x half> @shuffle_v4_v8f16_r0_1(ptr addrspace(1) nocapture readonly %arg0) local_unnamed_addr {
+; CHECK-LABEL: define <8 x half> @shuffle_v4_v8f16_r0_1(
+; CHECK-SAME: ptr addrspace(1) readonly captures(none) [[ARG0:%.*]]) local_unnamed_addr {
+; CHECK-NEXT: [[ENTRY:.*:]]
+; CHECK-NEXT: [[VAL0:%.*]] = load <4 x half>, ptr addrspace(1) [[ARG0]], align 32
+; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x half> [[VAL0]], <4 x half> poison, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1>
+; CHECK-NEXT: ret <8 x half> [[TMP1]]
+;
+entry:
+ %val0 = load <4 x half>, ptr addrspace(1) %arg0, align 32
+ %val1 = shufflevector <4 x half> %val0, <4 x half> poison, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1>
+ ret <8 x half> %val1
+}
+
+define <8 x half> @shuffle_v4_v8f16_r0_2(ptr addrspace(1) nocapture readonly %arg0) local_unnamed_addr {
+; CHECK-LABEL: define <8 x half> @shuffle_v4_v8f16_r0_2(
+; CHECK-SAME: ptr addrspace(1) readonly captures(none) [[ARG0:%.*]]) local_unnamed_addr {
+; CHECK-NEXT: [[ENTRY:.*:]]
+; CHECK-NEXT: [[VAL0:%.*]] = load <4 x half>, ptr addrspace(1) [[ARG0]], align 32
+; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x half> [[VAL0]], <4 x half> poison, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 2, i32 2, i32 2, i32 2>
+; CHECK-NEXT: ret <8 x half> [[TMP1]]
+;
+entry:
+ %val0 = load <4 x half>, ptr addrspace(1) %arg0, align 32
+ %val1 = shufflevector <4 x half> %val0, <4 x half> poison, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 2, i32 2, i32 2, i32 2>
+ ret <8 x half> %val1
+}
+
+define <4 x half> @shuffle_v4_v4f16_r1_2(ptr addrspace(1) nocapture readonly %arg0) local_unnamed_addr {
+; CHECK-LABEL: define <4 x half> @shuffle_v4_v4f16_r1_2(
+; CHECK-SAME: ptr addrspace(1) readonly captures(none) [[ARG0:%.*]]) local_unnamed_addr {
+; CHECK-NEXT: [[ENTRY:.*:]]
+; CHECK-NEXT: [[VAL0:%.*]] = load <4 x half>, ptr addrspace(1) [[ARG0]], align 32
+; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x half> [[VAL0]], <4 x half> poison, <4 x i32> <i32 1, i32 1, i32 2, i32 2>
+; CHECK-NEXT: ret <4 x half> [[TMP1]]
+;
+entry:
+ %val0 = load <4 x half>, ptr addrspace(1) %arg0, align 32
+ %val1 = shufflevector <4 x half> %val0, <4 x half> poison, <4 x i32> <i32 1, i32 1, i32 2, i32 2>
+ ret <4 x half> %val1
+}
+
+define <8 x half> @shuffle_v4_v8f16_r1_2(ptr addrspace(1) nocapture readonly %arg0) local_unnamed_addr {
+; CHECK-LABEL: define <8 x half> @shuffle_v4_v8f16_r1_2(
+; CHECK-SAME: ptr addrspace(1) readonly captures(none) [[ARG0:%.*]]) local_unnamed_addr {
+; CHECK-NEXT: [[ENTRY:.*:]]
+; CHECK-NEXT: [[VAL0:%.*]] = load <4 x half>, ptr addrspace(1) [[ARG0]], align 32
+; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x half> [[VAL0]], <4 x half> poison, <8 x i32> <i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2>
+; CHECK-NEXT: ret <8 x half> [[TMP1]]
+;
+entry:
+ %val0 = load <4 x half>, ptr addrspace(1) %arg0, align 32
+ %val1 = shufflevector <4 x half> %val0, <4 x half> poison, <8 x i32> <i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2>
+ ret <8 x half> %val1
+}
+
+define <8 x half> @shuffle_v4_v8f16_cond_r0_1(ptr addrspace(1) nocapture readonly %arg0, i1 %cond) local_unnamed_addr {
+; CHECK-LABEL: define <8 x half> @shuffle_v4_v8f16_cond_r0_1(
+; CHECK-SAME: ptr addrspace(1) readonly captures(none) [[ARG0:%.*]], i1 [[COND:%.*]]) local_unnamed_addr {
+; CHECK-NEXT: [[ENTRY:.*:]]
+; CHECK-NEXT: [[VAL0:%.*]] = load <4 x half>, ptr addrspace(1) [[ARG0]], align 32
+; CHECK-NEXT: br i1 [[COND]], label %[[THEN:.*]], label %[[ELSE:.*]]
+; CHECK: [[THEN]]:
+; CHECK-NEXT: [[VAL1:%.*]] = shufflevector <4 x half> [[VAL0]], <4 x half> poison, <8 x i32> zeroinitializer
+; CHECK-NEXT: br label %[[FINALLY:.*]]
+; CHECK: [[ELSE]]:
+; CHECK-NEXT: [[VAL2:%.*]] = shufflevector <4 x half> [[VAL0]], <4 x half> poison, <8 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
+; CHECK-NEXT: br label %[[FINALLY]]
+; CHECK: [[FINALLY]]:
+; CHECK-NEXT: [[VAL3:%.*]] = phi <8 x half> [ [[VAL1]], %[[THEN]] ], [ [[VAL2]], %[[ELSE]] ]
+; CHECK-NEXT: ret <8 x half> [[VAL3]]
+;
+entry:
+ %val0 = load <4 x half>, ptr addrspace(1) %arg0, align 32
+ br i1 %cond, label %then, label %else
+
+then:
+ %val1 = shufflevector <4 x half> %val0, <4 x half> poison, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
+ br label %finally
+
+else:
+ %val2 = shufflevector <4 x half> %val0, <4 x half> poison, <8 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
+ br label %finally
+
+finally:
+ %val3 = phi <8 x half> [ %val1, %then ], [ %val2, %else ]
+ ret <8 x half> %val3
+}
+
+define <4 x half> @shuffle_v4_v4f16_cond_r1_2(ptr addrspace(1) nocapture readonly %arg0, i1 %cond) local_unnamed_addr {
+; CHECK-LABEL: define <4 x half> @shuffle_v4_v4f16_cond_r1_2(
+; CHECK-SAME: ptr addrspace(1) readonly captures(none) [[ARG0:%.*]], i1 [[COND:%.*]]) local_unnamed_addr {
+; CHECK-NEXT: [[ENTRY:.*:]]
+; CHECK-NEXT: [[VAL0:%.*]] = load <4 x half>, ptr addrspace(1) [[ARG0]], align 32
+; CHECK-NEXT: br i1 [[COND]], label %[[THEN:.*]], label %[[ELSE:.*]]
+; CHECK: [[THEN]]:
+; CHECK-NEXT: [[VAL1:%.*]] = shufflevector <4 x half> [[VAL0]], <4 x half> poison, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
+; CHECK-NEXT: br label %[[FINALLY:.*]]
+; CHECK: [[ELSE]]:
+; CHECK-NEXT: [[VAL2:%.*]] = shufflevector <4 x half> [[VAL0]], <4 x half> poison, <4 x i32> <i32 2, i32 2, i32 2, i32 2>
+; CHECK-NEXT: br label %[[FINALLY]]
+; CHECK: [[FINALLY]]:
+; CHECK-NEXT: [[VAL3:%.*]] = phi <4 x half> [ [[VAL1]], %[[THEN]] ], [ [[VAL2]], %[[ELSE]] ]
+; CHECK-NEXT: ret <4 x half> [[VAL3]]
+;
+entry:
+ %val0 = load <4 x half>, ptr addrspace(1) %arg0, align 32
+ br i1 %cond, label %then, label %else
+
+then:
+ %val1 = shufflevector <4 x half> %val0, <4 x half> poison, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
+ br label %finally
+
+else:
+ %val2 = shufflevector <4 x half> %val0, <4 x half> poison, <4 x i32> <i32 2, i32 2, i32 2, i32 2>
+ br label %finally
+
+finally:
+ %val3 = phi <4 x half> [ %val1, %then ], [ %val2, %else ]
+ ret <4 x half> %val3
+}
+
+define <8 x half> @shuffle_v4_v8f16_cond_r1_2(ptr addrspace(1) nocapture readonly %arg0, i1 %cond) local_unnamed_addr {
+; CHECK-LABEL: define <8 x half> @shuffle_v4_v8f16_cond_r1_2(
+; CHECK-SAME: ptr addrspace(1) readonly captures(none) [[ARG0:%.*]], i1 [[COND:%.*]]) local_unnamed_addr {
+; CHECK-NEXT: [[ENTRY:.*:]]
+; CHECK-NEXT: [[VAL0:%.*]] = load <4 x half>, ptr addrspace(1) [[ARG0]], align 32
+; CHECK-NEXT: br i1 [[COND]], label %[[THEN:.*]], label %[[ELSE:.*]]
+; CHECK: [[THEN]]:
+; CHECK-NEXT: [[VAL1:%.*]] = shufflevector <4 x half> [[VAL0]], <4 x half> poison, <8 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
+; CHECK-NEXT: br label %[[FINALLY:.*]]
+; CHECK: [[ELSE]]:
+; CHECK-NEXT: [[VAL2:%.*]] = shufflevector <4 x half> [[VAL0]], <4 x half> poison, <8 x i32> <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2>
+; CHECK-NEXT: br label %[[FINALLY]]
+; CHECK: [[FINALLY]]:
+; CHECK-NEXT: [[VAL3:%.*]] = phi <8 x half> [ [[VAL1]], %[[THEN]] ], [ [[VAL2]], %[[ELSE]] ]
+; CHECK-NEXT: ret <8 x half> [[VAL3]]
+;
+entry:
+ %val0 = load <4 x half>, ptr addrspace(1) %arg0, align 32
+ br i1 %cond, label %then, label %else
+
+then:
+ %val1 = shufflevector <4 x half> %val0, <4 x half> poison, <8 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
+ br label %finally
+
+else:
+ %val2 = shufflevector <4 x half> %val0, <4 x half> poison, <8 x i32> <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2>
+ br label %finally
+
+finally:
+ %val3 = phi <8 x half> [ %val1, %then ], [ %val2, %else ]
+ ret <8 x half> %val3
+}
+
+define <8 x i32> @shuffle_v4_v8i32_r0_1(ptr addrspace(1) nocapture readonly %arg0) local_unnamed_addr {
+; CHECK-LABEL: define <8 x i32> @shuffle_v4_v8i32_r0_1(
+; CHECK-SAME: ptr addrspace(1) readonly captures(none) [[ARG0:%.*]]) local_unnamed_addr {
+; CHECK-NEXT: [[ENTRY:.*:]]
+; CHECK-NEXT: [[VAL0:%.*]] = load <4 x i32>, ptr addrspace(1) [[ARG0]], align 32
+; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[VAL0]], <4 x i32> poison, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1>
+; CHECK-NEXT: ret <8 x i32> [[TMP1]]
+;
+entry:
+ %val0 = load <4 x i32>, ptr addrspace(1) %arg0, align 32
+ %val1 = shufflevector <4 x i32> %val0, <4 x i32> poison, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1>
+ ret <8 x i32> %val1
+}
+
+define <8 x i32> @shuffle_v4_v8i32_r0_2(ptr addrspace(1) nocapture readonly %arg0) local_unnamed_addr {
+; CHECK-LABEL: define <8 x i32> @shuffle_v4_v8i32_r0_2(
+; CHECK-SAME: ptr addrspace(1) readonly captures(none) [[ARG0:%.*]]) local_unnamed_addr {
+; CHECK-NEXT: [[ENTRY:.*:]]
+; CHECK-NEXT: [[VAL0:%.*]] = load <4 x i32>, ptr addrspace(1) [[ARG0]], align 32
+; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[VAL0]], <4 x i32> poison, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 2, i32 2, i32 2, i32 2>
+; CHECK-NEXT: ret <8 x i32> [[TMP1]]
+;
+entry:
+ %val0 = load <4 x i32>, ptr addrspace(1) %arg0, align 32
+ %val1 = shufflevector <4 x i32> %val0, <4 x i32> poison, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 2, i32 2, i32 2, i32 2>
+ ret <8 x i32> %val1
+}
+
+define <4 x i32> @shuffle_v4_v4i32_r1_2(ptr addrspace(1) nocapture readonly %arg0) local_unnamed_addr {
+; CHECK-LABEL: define <4 x i32> @shuffle_v4_v4i32_r1_2(
+; CHECK-SAME: ptr addrspace(1) readonly captures(none) [[ARG0:%.*]]) local_unnamed_addr {
+; CHECK-NEXT: [[ENTRY:.*:]]
+; CHECK-NEXT: [[VAL0:%.*]] = load <4 x i32>, ptr addrspace(1) [[ARG0]], align 32
+; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[VAL0]], <4 x i32> poison, <4 x i32> <i32 1, i32 1, i32 2, i32 2>
+; CHECK-NEXT: ret <4 x i32> [[TMP1]]
+;
+entry:
+ %val0 = load <4 x i32>, ptr addrspace(1) %arg0, align 32
+ %val1 = shufflevector <4 x i32> %val0, <4 x i32> poison, <4 x i32> <i32 1, i32 1, i32 2, i32 2>
+ ret <4 x i32> %val1
+}
+
+define <8 x i32> @shuffle_v4_v8i32_r1_2(ptr addrspace(1) nocapture readonly %arg0) local_unnamed_addr {
+; CHECK-LABEL: define <8 x i32> @shuffle_v4_v8i32_r1_2(
+; CHECK-SAME: ptr addrspace(1) readonly captures(none) [[ARG0:%.*]]) local_unnamed_addr {
+; CHECK-NEXT: [[ENTRY:.*:]]
+; CHECK-NEXT: [[VAL0:%.*]] = load <4 x i32>, ptr addrspace(1) [[ARG0]], align 32
+; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[VAL0]], <4 x i32> poison, <8 x i32> <i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2>
+; CHECK-NEXT: ret <8 x i32> [[TMP1]]
+;
+entry:
+ %val0 = load <4 x i32>, ptr addrspace(1) %arg0, align 32
+ %val1 = shufflevector <4 x i32> %val0, <4 x i32> poison, <8 x i32> <i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2>
+ ret <8 x i32> %val1
+}
+
+define <8 x i32> @shuffle_v4_v8i32_cond_r0_1(ptr addrspace(1) nocapture readonly %arg0, i1 %cond) local_unnamed_addr {
+; CHECK-LABEL: define <8 x i32> @shuffle_v4_v8i32_cond_r0_1(
+; CHECK-SAME: ptr addrspace(1) readonly captures(none) [[ARG0:%.*]], i1 [[COND:%.*]]) local_unnamed_addr {
+; CHECK-NEXT: [[ENTRY:.*:]]
+; CHECK-NEXT: [[VAL0:%.*]] = load <4 x i32>, ptr addrspace(1) [[ARG0]], align 32
+; CHECK-NEXT: br i1 [[COND]], label %[[THEN:.*]], label %[[ELSE:.*]]
+; CHECK: [[THEN]]:
+; CHECK-NEXT: [[VAL1:%.*]] = shufflevector <4 x i32> [[VAL0]], <4 x i32> poison, <8 x i32> zeroinitializer
+; CHECK-NEXT: br label %[[FINALLY:.*]]
+; CHECK: [[ELSE]]:
+; CHECK-NEXT: [[VAL2:%.*]] = shufflevector <4 x i32> [[VAL0]], <4 x i32> poison, <8 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
+; CHECK-NEXT: br label %[[FINALLY]]
+; CHECK: [[FINALLY]]:
+; CHECK-NEXT: [[VAL3:%.*]] = phi <8 x i32> [ [[VAL1]], %[[THEN]] ], [ [[VAL2]], %[[ELSE]] ]
+; CHECK-NEXT: ret <8 x i32> [[VAL3]]
+;
+entry:
+ %val0 = load <4 x i32>, ptr addrspace(1) %arg0, align 32
+ br i1 %cond, label %then, label %else
+
+then:
+ %val1 = shufflevector <4 x i32> %val0, <4 x i32> poison, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
+ br label %finally
+
+else:
+ %val2 = shufflevector <4 x i32> %val0, <4 x i32> poison, <8 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
+ br label %finally
+
+finally:
+ %val3 = phi <8 x i32> [ %val1, %then ], [ %val2, %else ]
+ ret <8 x i32> %val3
+}
+
+define <8 x i32> @shuffle_v4_v8i32_cond_r0_2(ptr addrspace(1) nocapture readonly %arg0, i1 %cond) local_unnamed_addr {
+; CHECK-LABEL: define <8 x i32> @shuffle_v4_v8i32_cond_r0_2(
+; CHECK-SAME: ptr addrspace(1) readonly captures(none) [[ARG0:%.*]], i1 [[COND:%.*]]) local_unnamed_addr {
+; CHECK-NEXT: [[ENTRY:.*:]]
+; CHECK-NEXT: [[VAL0:%.*]] = load <4 x i32>, ptr addrspace(1) [[ARG0]], align 32
+; CHECK-NEXT: br i1 [[COND]], label %[[THEN:.*]], label %[[ELSE:.*]]
+; CHECK: [[THEN]]:
+; CHECK-NEXT: [[VAL1:%.*]] = shufflevector <4 x i32> [[VAL0]], <4 x i32> poison, <8 x i32> zeroinitializer
+; CHECK-NEXT: br label %[[FINALLY:.*]]
+; CHECK: [[ELSE]]:
+; CHECK-NEXT: [[VAL2:%.*]] = shufflevector <4 x i32> [[VAL0]], <4 x i32> poison, <8 x i32> <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2>
+; CHECK-NEXT: br label %[[FINALLY]]
+; CHECK: [[FINALLY]]:
+; CHECK-NEXT: [[VAL3:%.*]] = phi <8 x i32> [ [[VAL1]], %[[THEN]] ], [ [[VAL2]], %[[ELSE]] ]
+; CHECK-NEXT: ret <8 x i32> [[VAL3]]
+;
+entry:
+ %val0 = load <4 x i32>, ptr addrspace(1) %arg0, align 32
+ br i1 %cond, label %then, label %else
+
+then:
+ %val1 = shufflevector <4 x i32> %val0, <4 x i32> poison, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
+ br label %finally
+
+else:
+ %val2 = shufflevector <4 x i32> %val0, <4 x i32> poison, <8 x i32> <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2>
+ br label %finally
+
+finally:
+ %val3 = phi <8 x i32> [ %val1, %then ], [ %val2, %else ]
+ ret <8 x i32> %val3
+}
+
+define <4 x i32> @shuffle_v4_v4i32_cond_r1_2(ptr addrspace(1) nocapture readonly %arg0, i1 %cond) local_unnamed_addr {
+; CHECK-LABEL: define <4 x i32> @shuffle_v4_v4i32_cond_r1_2(
+; CHECK-SAME: ptr addrspace(1) readonly captures(none) [[ARG0:%.*]], i1 [[COND:%.*]]) local_unnamed_addr {
+; CHECK-NEXT: [[ENTRY:.*:]]
+; CHECK-NEXT: [[VAL0:%.*]] = load <4 x i32>, ptr addrspace(1) [[ARG0]], align 32
+; CHECK-NEXT: br i1 [[COND]], label %[[THEN:.*]], label %[[ELSE:.*]]
+; CHECK: [[THEN]]:
+; CHECK-NEXT: [[VAL1:%.*]] = shufflevector <4 x i32> [[VAL0]], <4 x i32> poison, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
+; CHECK-NEXT: br label %[[FINALLY:.*]]
+; CHECK: [[ELSE]]:
+; CHECK-NEXT: [[VAL2:%.*]] = shufflevector <4 x i32> [[VAL0]], <4 x i32> poison, <4 x i32> <i32 2, i32 2, i32 2, i32 2>
+; CHECK-NEXT: br label %[[FINALLY]]
+; CHECK: [[FINALLY]]:
+; CHECK-NEXT: [[VAL3:%.*]] = phi <4 x i32> [ [[VAL1]], %[[THEN]] ], [ [[VAL2]], %[[ELSE]] ]
+; CHECK-NEXT: ret <4 x i32> [[VAL3]]
+;
+entry:
+ %val0 = load <4 x i32>, ptr addrspace(1) %arg0, align 32
+ br i1 %cond, label %then, label %else
+
+then:
+ %val1 = shufflevector <4 x i32> %val0, <4 x i32> poison, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
+ br label %finally
+
+else:
+ %val2 = shufflevector <4 x i32> %val0, <4 x i32> poison, <4 x i32> <i32 2, i32 2, i32 2, i32 2>
+ br label %finally
+
+finally:
+ %val3 = phi <4 x i32> [ %val1, %then ], [ %val2, %else ]
+ ret <4 x i32> %val3
+}
+
+define <8 x i32> @shuffle_v4_v8i32_cond_r1_2(ptr addrspace(1) nocapture readonly %arg0, i1 %cond) local_unnamed_addr {
+; CHECK-LABEL: define <8 x i32> @shuffle_v4_v8i32_cond_r1_2(
+; CHECK-SAME: ptr addrspace(1) readonly captures(none) [[ARG0:%.*]], i1 [[COND:%.*]]) local_unnamed_addr {
+; CHECK-NEXT: [[ENTRY:.*:]]
+; CHECK-NEXT: [[VAL0:%.*]] = load <4 x i32>, ptr addrspace(1) [[ARG0]], align 32
+; CHECK-NEXT: br i1 [[COND]], label %[[THEN:.*]], label %[[ELSE:.*]]
+; CHECK: [[THEN]]:
+; CHECK-NEXT: [[VAL1:%.*]] = shufflevector <4 x i32> [[VAL0]], <4 x i32> poison, <8 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
+; CHECK-NEXT: br label %[[FINALLY:.*]]
+; CHECK: [[ELSE]]:
+; CHECK-NEXT: [[VAL2:%.*]] = shufflevector <4 x i32> [[VAL0]], <4 x i32> poison, <8 x i32> <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2>
+; CHECK-NEXT: br label %[[FINALLY]]
+; CHECK: [[FINALLY]]:
+; CHECK-NEXT: [[VAL3:%.*]] = phi <8 x i32> [ [[VAL1]], %[[THEN]] ], [ [[VAL2]], %[[ELSE]] ]
+; CHECK-NEXT: ret <8 x i32> [[VAL3]]
+;
+entry:
+ %val0 = load <4 x i32>, ptr addrspace(1) %arg0, align 32
+ br i1 %cond, label %then, label %else
+
+then:
+ %val1 = shufflevector <4 x i32> %val0, <4 x i32> poison, <8 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
+ br label %finally
+
+else:
+ %val2 = shufflevector <4 x i32> %val0, <4 x i32> poison, <8 x i32> <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2>
+ br label %finally
+
+finally:
+ %val3 = phi <8 x i32> [ %val1, %then ], [ %val2, %else ]
+ ret <8 x i32> %val3
+}
|
✅ With the latest revision this PR passed the C/C++ code formatter. |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This needs to be a cost-model driven transform in VectorCombine. Shrinking vector loads is not always profitable.
In the SystemZ tests, the variables involved have been deliberated marked |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
What is your use case for this? We're already fighting the effects of DAGCombine reduceLoadWidth and I'm worried this is going to make it worse.
The original use case came from FireRender. There were a number of places where only one element of an incoming vector is used in a shufflevector broadcast. To quote:
|
✅ With the latest revision this PR passed the undef deprecator. |
Convert a BUILD_VECTOR of scalar values to a shuffle of shuffles that will lower to AVX blend. This addresses a regression in #128938. --------- Co-authored-by: Leon Clark <[email protected]>
) Convert a BUILD_VECTOR of scalar values to a shuffle of shuffles that will lower to AVX blend. This addresses a regression in llvm#128938. --------- Co-authored-by: Leon Clark <[email protected]>
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM - cheers
target triple = "x86_64-unknown-linux-gnu"
define <16 x i8> @test(ptr %arg) {
%load= load <16 x i8>, ptr %arg, align 1
%shuf = shufflevector <16 x i8> %load, <16 x i8> <i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef>, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
ret <16 x i8> %shuf
}
|
@PeddleSpam I'm going to revert this - please can you investigate the regression? |
… rebroadcasts" (#151960) Reverts llvm/llvm-project#128938 while a crash regression is investigated
@RKSimon thanks, yes I'll take a look. |
…#128938) Attempt to shrink the size of vector loads where only some of the incoming lanes are used for rebroadcasts in shufflevector instructions. --------- Co-authored-by: Leon Clark <[email protected]> Co-authored-by: Simon Pilgrim <[email protected]>
…ts" (llvm#151960) Reverts llvm#128938 while a crash regression is investigated
…3138) Reopen #128938. Attempt to shrink the size of vector loads where only some of the incoming lanes are used for rebroadcasts in shufflevector instructions. --------- Co-authored-by: Leon Clark <[email protected]> Co-authored-by: Simon Pilgrim <[email protected]>
Attempt to narrow a phi of shufflevector instructions where the two incoming values have the same operands but different masks. Related to #128938. --------- Co-authored-by: Leon Clark <[email protected]>
Attempt to shrink the size of vector loads where only some of the incoming lanes are used for rebroadcasts in shufflevector instructions.